STRATAA microbiome
analysis
Imports
library(kableExtra)
library(ggplot2)
library(ggpubr)
library(patchwork)
library(vegan)
Sources
The file handles are set in config.R as they’re used by both this
script and data_cleaning.
source("/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_Leo/Leonardos_analysis/bin/core_functions.R")
source("/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_Leo/Leonardos_analysis/bin/config.R")
Metadata basics
First do the plots with kruskall wallis comparison between groups to
see if there’s an overall difference, then do the pairwise tests because
KW says there is a difference.
metadata <- read_metadata(metadata_handle)
metadata <- metadata %>% mutate(Group = if_else(Group == 'Control_HealthySerosurvey', 'Healthy Control', Group))
metadata <- metadata %>% mutate(Group = if_else(Group == 'Acute_Typhi', 'Acute typhoid', Group))
number_per_country <- metadata %>% group_by(Country) %>% summarise(count = n())
number_per_country %>% kbl() %>% kable_styling()
|
Country
|
count
|
|
Bangladesh
|
120
|
|
Malawi
|
114
|
|
Nepal
|
76
|
print(sum(number_per_country$count))
## [1] 310
metadata %>% group_by(Group) %>% summarise(count = n()) %>% kbl() %>% kable_styling()
|
Group
|
count
|
|
Acute typhoid
|
103
|
|
Carrier
|
110
|
|
Healthy Control
|
97
|
baseline_chars <- get_baseline_characteristics(metadata)
baseline_chars$pct_anti
## [1] 37.50000 0.00000 0.00000 26.47059 0.00000 0.00000 44.82759 0.00000
## [9] 0.00000
baseline_chars$pct_anti %>% na_if(0)
## [1] 37.50000 NA NA 26.47059 NA NA 44.82759 NA
## [9] NA
# BEWARE - na_if(0.0) will convert ALL 0s to NAs, this is ok as they're only in the antibiotics of carriers and controls
# at the moment, but if others get added in, will screw with it.
baseline_chars %>% rename(`Median age` = median_age, `Women (%)` = pct_fem, `Antibiotics in last 2 weeks (%)` = pct_anti) %>% pivot_longer(!c(Country, Group)) %>% rename(variable_name = name) %>% pivot_wider(names_from = Group, values_from = value) %>% filter(variable_name != 'number') %>% na_if(0.0) %>% kbl(digits = c(NA, NA, 1, 1, 1)) %>% kable_styling()
|
Country
|
variable_name
|
Acute typhoid
|
Carrier
|
Healthy Control
|
|
Bangladesh
|
Median age
|
6.0
|
37.0
|
28.5
|
|
Bangladesh
|
Women (%)
|
47.5
|
47.5
|
65.0
|
|
Bangladesh
|
Antibiotics in last 2 weeks (%)
|
37.5
|
NA
|
NA
|
|
Malawi
|
Median age
|
9.7
|
32.0
|
24.0
|
|
Malawi
|
Women (%)
|
64.7
|
57.5
|
65.0
|
|
Malawi
|
Antibiotics in last 2 weeks (%)
|
26.5
|
NA
|
NA
|
|
Nepal
|
Median age
|
17.2
|
43.9
|
35.0
|
|
Nepal
|
Women (%)
|
24.1
|
66.7
|
82.4
|
|
Nepal
|
Antibiotics in last 2 weeks (%)
|
44.8
|
NA
|
NA
|
ggplot(metadata, aes(x = Country, y = Age, fill = Group)) + geom_boxplot() + stat_compare_means(method = 'kruskal.test', label = "p")

comparisons_groups <- list(c("Acute typhoid", "Carrier"), c("Acute typhoid", "Healthy Control"), c("Carrier", "Healthy Control"))
comparisons_countries <- list(c('Bangladesh', 'Malawi'), c('Bangladesh', 'Nepal'), c('Malawi', 'Nepal'))
# default stat method when doing pairwise tests in wilcoxon.
ggboxplot(metadata, facet.by = "Country", y = "Age", x = "Group", color = "Group") + stat_compare_means(comparisons = comparisons_groups) + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

ggboxplot(metadata, facet.by = "Group", y = "Age", x = "Country", color = "Country") + stat_compare_means(comparisons = comparisons_countries) + rotate_x_text(angle = 45)

country_group_sex <- metadata %>% group_by(Country, Group, Sex) %>% summarise(count = n())
plot_sex <- function(eg1, c){
d <- eg1 %>% filter(Country == c)
p <- ggplot(d, aes(x = Group, y = count, fill = Sex)) +
geom_bar(stat ='identity', position = 'fill') +
ylab('Proportion') +
ggtitle(c)# +
#theme(axis.text=element_text(size=34), axis.title=element_text(size=36,face="bold"), plot.title = element_text(size = 40, face = "bold"), legend.key.size = unit(4, 'cm'), legend.title = element_text(size = 34), legend.text = element_text(size = 28))
return(p)
}
m_sex <- plot_sex(country_group_sex, 'Malawi')
b_sex <- plot_sex(country_group_sex, 'Bangladesh')
n_sex <- plot_sex(country_group_sex, 'Nepal')
m_sex / b_sex / n_sex

Alpha diversity
meta_for_alpha <- metadata %>% select(isolate, Group, Sex, Country, Antibiotics_taken_before_sampling_yes_no_assumptions, age_bracket)
all three
ANOVA results
anova_all_three_csgaa_res <- read_delim(file.path(output_folder_all_three, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_all_three_csgaa_res %>% kbl %>% kable_styling()
|
rownames(anova_res[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Country:Group
|
4
|
11.0251459
|
2.7562865
|
8.4327318
|
0.0000022
|
significant
|
|
Group
|
2
|
6.5972886
|
3.2986443
|
10.0920507
|
0.0000615
|
significant
|
|
Country
|
2
|
6.1559033
|
3.0779517
|
9.4168517
|
0.0001149
|
significant
|
|
Country:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
3.2718745
|
1.6359372
|
5.0050748
|
0.0074085
|
significant
|
|
Country:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
3
|
3.7981561
|
1.2660520
|
3.8734281
|
0.0098608
|
significant
|
|
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
2.6171373
|
1.3085686
|
4.0035056
|
0.0194634
|
not_significant
|
|
Country:Sex
|
2
|
1.4229022
|
0.7114511
|
2.1766519
|
0.1156181
|
not_significant
|
|
Sex:Group
|
2
|
1.1109702
|
0.5554851
|
1.6994811
|
0.1849346
|
not_significant
|
|
age_bracket
|
3
|
1.5899600
|
0.5299867
|
1.6214699
|
0.1849599
|
not_significant
|
|
Sex:Group:age_bracket
|
3
|
1.5836426
|
0.5278809
|
1.6150272
|
0.1864556
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
1.0007596
|
0.5003798
|
1.5308890
|
0.2184138
|
not_significant
|
|
Country:Group:age_bracket
|
4
|
1.6901944
|
0.4225486
|
1.2927680
|
0.2734354
|
not_significant
|
|
Country:Sex:age_bracket
|
3
|
1.1382017
|
0.3794006
|
1.1607586
|
0.3253660
|
not_significant
|
|
Country:Sex:Group
|
4
|
1.4524154
|
0.3631039
|
1.1108995
|
0.3519447
|
not_significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
3
|
0.8900115
|
0.2966705
|
0.9076498
|
0.4379545
|
not_significant
|
|
Group:age_bracket
|
4
|
1.0062431
|
0.2515608
|
0.7696387
|
0.5458834
|
not_significant
|
|
Sex
|
1
|
0.1057328
|
0.1057328
|
0.3234845
|
0.5700442
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
0.3545009
|
0.1772504
|
0.5422896
|
0.5821146
|
not_significant
|
|
Sex:age_bracket
|
3
|
0.5046598
|
0.1682199
|
0.5146611
|
0.6725503
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.0243682
|
0.0243682
|
0.0745533
|
0.7850500
|
not_significant
|
|
Country:age_bracket
|
4
|
0.5471136
|
0.1367784
|
0.4184672
|
0.7952587
|
not_significant
|
|
Residuals
|
244
|
79.7527911
|
0.3268557
|
NA
|
NA
|
NA
|
Plots
all_three_shannon_alpha <- read_delim(file.path(output_folder_all_three, '3_alpha', 'shannon.alpha_results.tsv'))
all_three_shannon_alpha <- left_join(all_three_shannon_alpha, meta_for_alpha, by = 'isolate', )
all_three_shannon_alpha <- all_three_shannon_alpha %>% mutate(Group = if_else(Group == 'Control_HealthySerosurvey', 'Healthy Control', Group))
all_three_shannon_alpha <- all_three_shannon_alpha %>% mutate(Group = if_else(Group == 'Acute_Typhi', 'Acute typhoid', Group)) %>% arrange(Country)
country_comps <- list(c('Bangladesh', 'Malawi'), c('Malawi', 'Nepal'), c('Bangladesh', 'Nepal'))
ggplot(all_three_shannon_alpha, aes(x=Country, y=alpha)) +
ggtitle("By country") +
labs(x="", y="Alpha diversity") +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.text.y = element_text(size = 12), axis.title = element_text(size = 14, face = 'bold')) +
stat_compare_means(size = 4, label = "p.format", comparisons = country_comps)

group_comps <- list(c(1, 2), c(2, 3), c(1, 3))
ggplot(all_three_shannon_alpha, aes(x=Group, y=alpha)) +
ggtitle("By group") +
labs(x="", y="Alpha diversity") +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), axis.text.y = element_text(size = 12), axis.title = element_text(size = 14, face = 'bold')) +
stat_compare_means(size = 4, label = "p.format", comparisons = group_comps)

#geom_point(alpha = 0.3, position = "jitter")
country_group_comps <- list(c('Acute typhoid', 'Carrier'), c('Acute typhoid', 'Healthy Control'), c('Healthy Control', 'Carrier'))
ggboxplot(all_three_shannon_alpha, facet.by = "Country", y = "alpha", x = "Group", color = "Group") + stat_compare_means(comparisons = country_group_comps, label = 'p.signif') + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

ggboxplot(all_three_shannon_alpha, facet.by = "Group", y = "alpha", x = "Country", color = "Country") + stat_compare_means(comparisons = country_comps) + rremove("x.text") + rremove("xlab") + rremove("x.ticks") # +rotate_x_text(angle = 45)

antibiotic_comps <- list(c('Yes', 'No'))
ggboxplot(all_three_shannon_alpha, facet.by = "Country", y = "alpha", x = "Antibiotics_taken_before_sampling_yes_no_assumptions", color = "Antibiotics_taken_before_sampling_yes_no_assumptions", legend.title = 'Antibiotics') + stat_compare_means(comparisons = antibiotic_comps) + rremove("x.text") + rremove("xlab") + rremove("x.ticks")

blantyre
ANOVA
anova_blantyre_sgaa_res <- read_delim(file.path(output_folder_blantyre, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_blantyre_sgaa_res %>% kbl %>% kable_styling()
|
rownames(anova_res[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Group
|
2
|
7.9649377
|
3.9824688
|
13.3520584
|
0.0000082
|
significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
4.6566930
|
4.6566930
|
15.6125356
|
0.0001533
|
significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
2.9475255
|
1.4737628
|
4.9410974
|
0.0091796
|
significant
|
|
Sex:Group
|
2
|
2.1447212
|
1.0723606
|
3.5953128
|
0.0314193
|
not_significant
|
|
Sex:Group:age_bracket
|
2
|
2.1337791
|
1.0668896
|
3.5769700
|
0.0319581
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
1.2400593
|
1.2400593
|
4.1575578
|
0.0443500
|
not_significant
|
|
age_bracket
|
2
|
1.0530543
|
0.5265272
|
1.7652923
|
0.1769464
|
not_significant
|
|
Group:age_bracket
|
2
|
0.7980604
|
0.3990302
|
1.3378321
|
0.2675253
|
not_significant
|
|
Sex
|
1
|
0.0956136
|
0.0956136
|
0.3205644
|
0.5726623
|
not_significant
|
|
Sex:age_bracket
|
2
|
0.2468792
|
0.1234396
|
0.4138570
|
0.6623334
|
not_significant
|
|
Residuals
|
91
|
27.1422319
|
0.2982663
|
NA
|
NA
|
NA
|
Plots
- The p-value on the graph is a kruskal wallis test (which is just
wilcoxon i think?)
blantyre_shannon_alpha <- read_delim(file.path(output_folder_blantyre, '3_alpha', 'shannon.alpha_results.tsv'))
blantyre_shannon_alpha <- left_join(blantyre_shannon_alpha, meta_for_alpha, by = 'isolate')
my_comparisons <- list(c('No', 'Yes'), c('No', 'Yes'), c('No', 'Yes'))
ggplot(blantyre_shannon_alpha, aes(x=age_bracket, y=alpha, fill = Antibiotics_taken_before_sampling_yes_no_assumptions)) +
#ggtitle("All three countries - country by group") +
labs(x="Age bracket", y="Alpha diversity", fill='Antibiotics', color = 'Antibiotics') +
geom_boxplot(outlier.shape = NA, alpha = 0.2) +
#geom_point(aes(color=as.factor(Antibiotics_taken_before_sampling_yes_no_assumptions)), alpha = 0.3) +
geom_point(alpha = 0.5, position=position_dodge(width=0.75),aes(group=Antibiotics_taken_before_sampling_yes_no_assumptions, color=as.factor(Antibiotics_taken_before_sampling_yes_no_assumptions))) +
stat_compare_means(label.y = c(5.2, 5.5, 5.7), size = 4, label = "p.format")

dhaka
anova_dhaka_sgaa_res <- read_delim(file.path(output_folder_dhaka, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_dhaka_sgaa_res %>% kbl %>% kable_styling()
|
rownames(anova_res[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Group
|
2
|
4.5622912
|
2.2811456
|
6.0661407
|
0.0033163
|
significant
|
|
Sex
|
1
|
1.4283133
|
1.4283133
|
3.7982448
|
0.0542564
|
not_significant
|
|
Sex:Group:age_bracket
|
1
|
0.3891949
|
0.3891949
|
1.0349672
|
0.3115788
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.3424352
|
0.3424352
|
0.9106215
|
0.3423714
|
not_significant
|
|
age_bracket
|
3
|
1.0032423
|
0.3344141
|
0.8892914
|
0.4496800
|
not_significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
3
|
0.8659569
|
0.2886523
|
0.7675992
|
0.5149717
|
not_significant
|
|
Sex:age_bracket
|
3
|
0.8635489
|
0.2878496
|
0.7654646
|
0.5161812
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.1407568
|
0.1407568
|
0.3743078
|
0.5421269
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.1191690
|
0.1191690
|
0.3169003
|
0.5748026
|
not_significant
|
|
Group:age_bracket
|
4
|
0.7582631
|
0.1895658
|
0.5041032
|
0.7327849
|
not_significant
|
|
Sex:Group
|
2
|
0.2172723
|
0.1086362
|
0.2888909
|
0.7497496
|
not_significant
|
|
Residuals
|
95
|
35.7243333
|
0.3760456
|
NA
|
NA
|
NA
|
kathmandu
anova_kathmandu_sgaa_res <- read_delim(file.path(output_folder_kathmandu, '3_alpha', 'shannon.anova.results.tsv'), delim = '\t')
anova_kathmandu_sgaa_res %>% kbl %>% kable_styling()
|
rownames(anova_res[[1]])
|
Df
|
Sum.Sq
|
Mean.Sq
|
F.value
|
Pr..F.
|
is_it_significant
|
|
Group
|
2
|
2.4148394
|
1.2074197
|
3.6926494
|
0.0309377
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
1.2170738
|
1.2170738
|
3.7221746
|
0.0585915
|
not_significant
|
|
Group:age_bracket
|
2
|
0.9025969
|
0.4512985
|
1.3802052
|
0.2596639
|
not_significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.2589283
|
0.2589283
|
0.7918800
|
0.3772085
|
not_significant
|
|
Sex
|
1
|
0.0699282
|
0.0699282
|
0.2138614
|
0.6454878
|
not_significant
|
|
age_bracket
|
2
|
0.1213784
|
0.0606892
|
0.1856057
|
0.8310924
|
not_significant
|
|
Sex:age_bracket
|
1
|
0.0090645
|
0.0090645
|
0.0277218
|
0.8683436
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
2
|
0.0601359
|
0.0300680
|
0.0919568
|
0.9122773
|
not_significant
|
|
Sex:Group
|
2
|
0.0545249
|
0.0272624
|
0.0833767
|
0.9201146
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
1
|
0.0029275
|
0.0029275
|
0.0089531
|
0.9249420
|
not_significant
|
|
Residuals
|
58
|
18.9647959
|
0.3269792
|
NA
|
NA
|
NA
|
Beta diversity
Information on interpretation of PERMANOVA R2 from here - https://forum.qiime2.org/t/group-significance-changes-and-interpretation/2396
, some info on the p-values here https://forum.qiime2.org/t/understanding-beta-group-significance-permanova-results/12648
run_plot_beta <- function(output_folder, meta, vars_to_plot){
pcoa.data.handle <- file.path(output_folder, '4_beta', "first_2d_coords.txt")
pcoa.var.handle <- file.path(output_folder, '4_beta', "pcoa_var.txt")
pcoa.data <- read_delim(pcoa.data.handle, delim = " ", escape_double = FALSE, trim_ws = TRUE)
#pcoa.data <- pcoa.data %>% separate(col = Y, sep = '\t', into = c('x', 'y')) %>% rename(ID = X)
pcoa.var <- read_delim(pcoa.var.handle, delim = " ", escape_double = FALSE, trim_ws = TRUE)
#pcoa.data <- read_tsv(pcoa.data.handle)
meta_for_beta <- meta %>% select(isolate, Country, Group, Sex, age_bracket, Antibiotics_taken_before_sampling_yes_no_assumptions, group_country, group_antibiotic)
pcoa.data <- left_join(pcoa.data, meta_for_beta, by = c('Sample' = 'isolate'))
# add some different plots to the below
# or, change this function to take the variable to colour by
plots <- plot_beta(pcoa.data, pcoa.var, vars_to_plot)
return(plots)
}
All three
countries
Plots
all_three_vars_to_plot <- c('Country', 'Group', 'Sex', 'age_bracket', 'group_country', 'group_antibiotic')
all_three_plots <- run_plot_beta(output_folder_all_three, metadata, all_three_vars_to_plot)
a3_c <- all_three_plots$country_plot
a3_c

a3_g <- all_three_plots$group_plot
a3_g

all_three_plots$sex_plot

all_three_plots$age_plot

all_three_plots$group_country_plot

all_three_plots$group_antibiotic_plot

a3_c / a3_g

PERMANOVA
pn_all3_csgaa_res <- read_delim(file.path(output_folder_all_three, '4_beta', 'permanova.results.tsv'))
pn_all3_csgaa_res %>% kbl %>% kable_styling()
|
variable
|
R2
|
Pr..F.
|
is_it_significant
|
|
Country:Group
|
0.1082441
|
0.0009990
|
significant
|
|
Country
|
0.0994852
|
0.0009990
|
significant
|
|
Group
|
0.0804962
|
0.0009990
|
significant
|
|
age_bracket
|
0.0246847
|
0.0009990
|
significant
|
|
Country:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0119992
|
0.0009990
|
significant
|
|
Country:Sex
|
0.0087463
|
0.0039960
|
significant
|
|
Country:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0080082
|
0.0059940
|
significant
|
|
Sex
|
0.0048251
|
0.0099900
|
significant
|
|
Country:age_bracket
|
0.0135873
|
0.0119880
|
not_significant
|
|
Country:Group:age_bracket
|
0.0112673
|
0.0869131
|
not_significant
|
|
Country:Sex:Group
|
0.0109128
|
0.1048951
|
not_significant
|
|
Group:age_bracket
|
0.0094648
|
0.3416583
|
not_significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0078747
|
0.1878122
|
not_significant
|
|
Sex:age_bracket
|
0.0078018
|
0.1958042
|
not_significant
|
|
Country:Sex:age_bracket
|
0.0074308
|
0.2857143
|
not_significant
|
|
Sex:Group:age_bracket
|
0.0071928
|
0.3376623
|
not_significant
|
|
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0070626
|
0.0189810
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0066436
|
0.0539461
|
not_significant
|
|
Sex:Group
|
0.0057893
|
0.1048951
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0054386
|
0.1788212
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0023788
|
0.3516484
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residuals
|
0.5506660
|
NA
|
NA
|
Just Blantyre
Plots
blantyre_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
blantyre_plots <- run_plot_beta(output_folder_blantyre, metadata, all_three_vars_to_plot)
b_g <- blantyre_plots$group_plot
b_g

blantyre_plots$sex_plot

blantyre_plots$age_plot

blantyre_plots$group_antibiotic_plot

PERMANOVA
pn_blantyre_sgaa_res <- read_delim(file.path(output_folder_blantyre, '4_beta', 'permanova.results.tsv'))
pn_blantyre_sgaa_res %>% kbl %>% kable_styling()
|
variable
|
R2
|
Pr..F.
|
is_it_significant
|
|
Group
|
0.2450967
|
0.0009990
|
significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0404489
|
0.0009990
|
significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0213950
|
0.0039960
|
significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0156828
|
0.0079920
|
significant
|
|
age_bracket
|
0.0248784
|
0.0119880
|
not_significant
|
|
Sex:Group
|
0.0195954
|
0.0309690
|
not_significant
|
|
Sex:age_bracket
|
0.0191798
|
0.0709291
|
not_significant
|
|
Group:age_bracket
|
0.0182757
|
0.0859141
|
not_significant
|
|
Sex:Group:age_bracket
|
0.0179867
|
0.0889111
|
not_significant
|
|
Sex
|
0.0095135
|
0.0959041
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residuals
|
0.5679472
|
NA
|
NA
|
Just Dhaka
Plots
dhaka_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
dhaka_plots <- run_plot_beta(output_folder_dhaka, metadata, all_three_vars_to_plot)
d_g <- dhaka_plots$group_plot
d_g

dhaka_plots$sex_plot

dhaka_plots$age_plot

dhaka_plots$group_antibiotic_plot

PERMANOVA
pn_dhaka_sgaa_res <- read_delim(file.path(output_folder_dhaka, '4_beta', 'permanova.results.tsv'))
pn_dhaka_sgaa_res %>% kbl %>% kable_styling()
|
variable
|
R2
|
Pr..F.
|
is_it_significant
|
|
Group
|
0.2850528
|
0.0009990
|
significant
|
|
age_bracket
|
0.0378323
|
0.0009990
|
significant
|
|
Group:age_bracket
|
0.0228223
|
0.5874126
|
not_significant
|
|
Sex:age_bracket
|
0.0171320
|
0.5414585
|
not_significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0162455
|
0.6623377
|
not_significant
|
|
Sex:Group
|
0.0133998
|
0.2627373
|
not_significant
|
|
Sex
|
0.0105260
|
0.0489510
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0076070
|
0.1938062
|
not_significant
|
|
Sex:Group:age_bracket
|
0.0072387
|
0.2297702
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0072303
|
0.2147852
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0054526
|
0.5494505
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residuals
|
0.5694607
|
NA
|
NA
|
Just Kathmandu
Plots
kathmandu_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
kathmandu_plots <- run_plot_beta(output_folder_kathmandu, metadata, all_three_vars_to_plot)
k_g <- kathmandu_plots$group_plot
k_g

kathmandu_plots$sex_plot

kathmandu_plots$age_plot

kathmandu_plots$group_antibiotic_plot

PERMANOVA
pn_kathmandu_sgaa_res <- read_delim(file.path(output_folder_kathmandu, '4_beta', 'permanova.results.tsv'))
pn_kathmandu_sgaa_res %>% kbl %>% kable_styling()
|
variable
|
R2
|
Pr..F.
|
is_it_significant
|
|
Group
|
0.0771814
|
0.0009990
|
significant
|
|
age_bracket
|
0.0333476
|
0.1328671
|
not_significant
|
|
Sex:Group
|
0.0288279
|
0.2887113
|
not_significant
|
|
Group:age_bracket
|
0.0287356
|
0.3116883
|
not_significant
|
|
Sex
|
0.0207446
|
0.0509491
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0195485
|
0.8801199
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0125371
|
0.4835165
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0120137
|
0.5194805
|
not_significant
|
|
Sex:age_bracket
|
0.0103350
|
0.7102897
|
not_significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0072818
|
0.9710290
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residuals
|
0.7494468
|
NA
|
NA
|
Blantyre and
Dhaka
Plots
blantyre_dhaka_vars_to_plot <- c('Group', 'Sex', 'age_bracket', 'group_antibiotic')
blantyre_dhaka_plots <- run_plot_beta(output_folder_blantyre_dhaka, metadata, all_three_vars_to_plot)
blantyre_dhaka_plots$group_plot

blantyre_dhaka_plots$sex_plot

blantyre_dhaka_plots$age_plot

blantyre_dhaka_plots$group_antibiotic_plot

PERMANOVA
pn_blantyre_dhaka_csgaa_res <- read_delim(file.path(output_folder_blantyre_dhaka, '4_beta', 'permanova.results.tsv'))
pn_blantyre_dhaka_csgaa_res %>% kbl %>% kable_styling()
|
variable
|
R2
|
Pr..F.
|
significant
|
|
Group
|
0.1540878
|
0.0009990
|
significant
|
|
Country
|
0.1057172
|
0.0009990
|
significant
|
|
Country:Group
|
0.0785701
|
0.0009990
|
significant
|
|
age_bracket
|
0.0279396
|
0.0009990
|
significant
|
|
age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0132086
|
0.0059940
|
significant
|
|
Country:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0095551
|
0.0059940
|
significant
|
|
Country:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0068922
|
0.0039960
|
significant
|
|
Country:Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0057642
|
0.0099900
|
significant
|
|
Group:age_bracket
|
0.0120929
|
0.2737263
|
not_significant
|
|
Sex:age_bracket
|
0.0099415
|
0.1378621
|
not_significant
|
|
Sex:Group:age_bracket
|
0.0094396
|
0.2027972
|
not_significant
|
|
Country:age_bracket
|
0.0086514
|
0.0339660
|
not_significant
|
|
Sex:Group
|
0.0080736
|
0.0459540
|
not_significant
|
|
Country:Group:age_bracket
|
0.0070425
|
0.1388611
|
not_significant
|
|
Country:Sex:Group
|
0.0069062
|
0.1448551
|
not_significant
|
|
Country:Sex:age_bracket
|
0.0067914
|
0.1658342
|
not_significant
|
|
Sex
|
0.0054105
|
0.0269730
|
not_significant
|
|
Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0045536
|
0.0599401
|
not_significant
|
|
Sex:age_bracket:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0037801
|
0.1338661
|
not_significant
|
|
Sex:Antibiotics_taken_before_sampling_yes_no_assumptions
|
0.0033876
|
0.2027972
|
not_significant
|
|
Country:Sex
|
0.0033118
|
0.2237762
|
not_significant
|
|
Total
|
1.0000000
|
NA
|
NA
|
|
Residuals
|
0.5088827
|
NA
|
NA
|
Combined plots
It looks like some might be being trimmed by the coordinate limits in
the plot_beta function, but they’re not (i’ve checked both blantyre and
dhaka).
#b_g / d_g | k_g / plot_spacer() + plot_layout(guides = 'collect')
b_g + d_g + k_g + guide_area() + plot_layout(ncol = 2, guides = 'collect')

Taxa associated with
participant groups
combined_output_root <- '/Users/flashton/Dropbox/GordonGroup/STRATAA_Microbiome/from_leo/phil/2022.11.08/combined_analysis'
if (!dir.exists(combined_output_root)){ dir.create(combined_output_root) }
groups_for_comparison <- c("Control_HealthySerosurvey", "Acute_Typhi")
covars <- c('Country', 'Sex', 'Age', 'Antibiotics_taken_before_sampling_yes_no_assumptions')
to_combine <- c(output_folder_all_three, output_folder_blantyre, output_folder_dhaka, output_folder_kathmandu, output_folder_blantyre_dhaka)
# the_date doesn't have to be todays date, it's just the current working dir
the_date <- '2022.11.10'
dge_output <- combine_and_compare_dges(to_combine, covars, output_folder_all_three, output_folder_blantyre, output_folder_dhaka, output_folder_kathmandu, combined_output_root, the_date, groups_for_comparison)
## [1] "Bangladesh FDR <= 0.01"
## [1] 936
## [1] "Malawi FDR <= 0.01"
## [1] 738
## [1] "Nepal FDR <= 0.01"
## [1] 7
## [1] "malawi_bangladesh FDR <= 0.01"
## [1] 975
## [1] "Jaccard of unfiltered taxa of Bang, Mal, Nep = 0.425525525525526"
#options(scipen = 999)
#options(scipen = 0) # to switch scientific notation back on
dge_output$sig_up_cases_controls %>% select(species, malawi_bangladesh_logFC, malawi_bangladesh_FDR, bangladesh_logFC, bangladesh_FDR, malawi_logFC, malawi_FDR) %>% arrange(malawi_bangladesh_FDR) %>% kbl() %>% kable_styling()
|
species
|
malawi_bangladesh_logFC
|
malawi_bangladesh_FDR
|
bangladesh_logFC
|
bangladesh_FDR
|
malawi_logFC
|
malawi_FDR
|
|
Clostridium_P perfringens
|
3.418064
|
0.00e+00
|
4.081502
|
0.0000034
|
3.072498
|
0.0000026
|
|
Paeniclostridium sordellii
|
3.289270
|
0.00e+00
|
2.682375
|
0.0002721
|
3.595776
|
0.0000068
|
|
Prevotella copri
|
3.215595
|
0.00e+00
|
3.252965
|
0.0000498
|
2.964184
|
0.0002320
|
|
Selenomonas_C sp002351185
|
9.923578
|
0.00e+00
|
10.206262
|
0.0002912
|
9.230450
|
0.0002674
|
|
Dialister sp000434475
|
4.177501
|
0.00e+00
|
5.436875
|
0.0000238
|
2.962639
|
0.0089626
|
|
Romboutsia timonensis
|
3.023815
|
1.50e-06
|
2.826541
|
0.0029203
|
2.901554
|
0.0037327
|
|
Dialister sp002320515
|
6.723816
|
1.08e-05
|
11.336950
|
0.0004670
|
4.639173
|
0.0081446
|
|
Dialister sp002297935
|
6.799250
|
6.44e-05
|
9.320825
|
0.0001399
|
4.091471
|
0.0054813
|
dge_output$sig_down_cases_controls %>% select(species, malawi_bangladesh_logFC, malawi_bangladesh_FDR, bangladesh_logFC, bangladesh_FDR, malawi_logFC, malawi_FDR) %>% arrange(malawi_bangladesh_FDR) %>% kbl() %>% kable_styling()
|
species
|
malawi_bangladesh_logFC
|
malawi_bangladesh_FDR
|
bangladesh_logFC
|
bangladesh_FDR
|
malawi_logFC
|
malawi_FDR
|
|
Olsenella_C umbonata
|
-16.402044
|
0.0000000
|
-20.051264
|
0.0000000
|
-9.581691
|
0.0000001
|
|
KLE1796 sp001580115
|
-9.944016
|
0.0000000
|
-9.596562
|
0.0000000
|
-9.018228
|
0.0000003
|
|
F23-B02 sp003533405
|
-12.733131
|
0.0000000
|
-7.558436
|
0.0067086
|
-14.795353
|
0.0000000
|
|
Lactobacillus_B ruminis
|
-14.997411
|
0.0000000
|
-20.923936
|
0.0000000
|
-6.240719
|
0.0000000
|
|
Solobacterium sp900343155
|
-11.372181
|
0.0000000
|
-12.170168
|
0.0000000
|
-10.199193
|
0.0000021
|
|
Massiliomicrobiota timonensis
|
-9.393248
|
0.0000000
|
-9.634145
|
0.0000000
|
-8.858593
|
0.0000014
|
|
Clostridium_M bolteae
|
-7.291020
|
0.0000000
|
-13.725675
|
0.0000000
|
-2.768290
|
0.0000000
|
|
Bacteroides_A sp000434735
|
-7.367197
|
0.0000000
|
-12.098871
|
0.0000000
|
-5.133698
|
0.0000002
|
|
NK4A136 sp000687675
|
-8.925917
|
0.0000000
|
-8.574760
|
0.0000046
|
-8.927376
|
0.0000002
|
|
Faecalicatena sp000509105
|
-7.423736
|
0.0000000
|
-13.264389
|
0.0000000
|
-1.433171
|
0.0037446
|
|
Absiella innocuum
|
-8.426218
|
0.0000000
|
-15.003970
|
0.0000000
|
-2.045904
|
0.0020647
|
|
UBA7102 sp002492415
|
-9.964592
|
0.0000000
|
-7.492815
|
0.0000797
|
-11.503441
|
0.0000000
|
|
Eubacterium limosum_A
|
-9.896316
|
0.0000000
|
-10.367409
|
0.0000002
|
-8.275325
|
0.0000616
|
|
F0332 sp001652275
|
-11.136505
|
0.0000000
|
-11.153392
|
0.0000004
|
-9.863396
|
0.0000055
|
|
Solobacterium extructa
|
-9.594100
|
0.0000000
|
-9.831203
|
0.0000000
|
-8.998385
|
0.0017448
|
|
UBA1777 sp900321275
|
-10.158145
|
0.0000000
|
-8.402279
|
0.0035241
|
-11.012279
|
0.0000000
|
|
Pauljensenia odontolyticus
|
-8.981532
|
0.0000000
|
-12.233399
|
0.0000000
|
-8.978208
|
0.0074376
|
|
Lactobacillus_H mucosae
|
-14.070677
|
0.0000000
|
-17.176342
|
0.0000000
|
-5.787237
|
0.0035450
|
|
Solobacterium moorei
|
-9.220286
|
0.0000000
|
-14.583365
|
0.0000000
|
-3.485564
|
0.0031044
|
|
NK3B98 sp003150485
|
-10.321224
|
0.0000000
|
-7.451078
|
0.0001947
|
-11.495422
|
0.0000000
|
|
CAG-145 sp000435615
|
-7.670724
|
0.0000000
|
-10.694660
|
0.0000000
|
-4.286481
|
0.0013487
|
|
CAG-791 sp900321785
|
-8.370999
|
0.0000000
|
-8.096736
|
0.0000293
|
-8.260572
|
0.0009183
|
|
UBA1390 sp002305315
|
-8.493585
|
0.0000000
|
-8.183057
|
0.0000515
|
-8.410475
|
0.0004686
|
|
UBA1777 sp002371675
|
-11.680438
|
0.0000000
|
-9.493327
|
0.0094971
|
-11.210683
|
0.0000000
|
|
UBA2727 sp900315505
|
-8.520189
|
0.0000000
|
-8.247159
|
0.0000485
|
-8.496791
|
0.0055841
|
|
UBA1417 sp003531055
|
-7.788689
|
0.0000000
|
-16.684786
|
0.0000000
|
-1.795009
|
0.0060732
|
|
Eisenbergiella tayi
|
-2.253882
|
0.0000000
|
-2.286730
|
0.0007815
|
-1.720815
|
0.0037448
|
|
Streptococcus oralis_AE
|
-8.387973
|
0.0000000
|
-12.247159
|
0.0000000
|
-6.154434
|
0.0036509
|
|
Angelakisella sp003453215
|
-6.529870
|
0.0000002
|
-7.570891
|
0.0029393
|
-6.087831
|
0.0001393
|
|
Eubacterium_E hallii_A
|
-14.393519
|
0.0000002
|
-20.418227
|
0.0000000
|
-1.982034
|
0.0024501
|
|
Olsenella_E provencensis
|
-6.428614
|
0.0000005
|
-3.597808
|
0.0016418
|
-8.908750
|
0.0000197
|
|
Eggerthella lenta
|
-3.033424
|
0.0000006
|
-3.348238
|
0.0000273
|
-3.974787
|
0.0011601
|
|
Enteroscipio sp000270285
|
-10.664030
|
0.0000007
|
-9.986373
|
0.0016677
|
-9.678756
|
0.0000016
|
|
Eubacterium_F sp002435545
|
-3.661160
|
0.0000088
|
-8.004029
|
0.0001154
|
-2.521352
|
0.0064757
|
|
UBA1777 sp900319465
|
-7.605461
|
0.0000174
|
-8.115137
|
0.0056352
|
-5.681010
|
0.0080948
|
|
UBA6857 sp002438505
|
-7.444564
|
0.0038637
|
-3.443917
|
0.0048247
|
-5.673872
|
0.0089186
|
#covar_initials <- paste(str_sub(covars, 1, 6), sep = '', collapse = '')
#combined_dge_output_folder <- file.path(combined_output_root, paste(covar_initials, 'combined_edgeR'))